## borramos el espacion de trabajo
rm(list =ls() )
##importamos la librerias
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.4 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(DT)
##traemos el CSV de fumadores desde git
ruta <- "https://raw.githubusercontent.com/lacamposm/Fundamentos_Analitica/main/data/fumadores.csv"
df <- read.csv2(ruta, sep = ",", dec=".",stringsAsFactors = TRUE)
#stringsAsFactors Sirve para que cuando venga una string la vuelva un factor
#dec = le decimos cual es el seprador decimal
#sep = le decimos cual es el separador del archivo
datatable(head(df,10))
## ejercicio numero 1
summary(df)
## edad sexo bmi hijos fumador
## Min. :18.00 hombre:676 Min. :15.96 Min. :0.000 no:1064
## 1st Qu.:27.00 mujer :662 1st Qu.:26.30 1st Qu.:0.000 si: 274
## Median :39.00 Median :30.40 Median :1.000
## Mean :39.21 Mean :30.66 Mean :1.095
## 3rd Qu.:51.00 3rd Qu.:34.69 3rd Qu.:2.000
## Max. :64.00 Max. :53.13 Max. :5.000
## region prima
## nor_este :324 Min. : 1122
## nor_oeste:325 1st Qu.: 4740
## sur_este :364 Median : 9382
## sur_oeste:325 Mean :13270
## 3rd Qu.:16640
## Max. :63770
## Ejercicio 2
p1 <- ggplot(df) +
aes(x = sexo) + geom_bar() + labs(y="cantidad") +
theme(axis.text.y = element_blank(), axis.ticks = element_blank() )
ggplotly(p1)
## ejercicio 3
summary(df$prima)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1122 4740 9382 13270 16640 63770
## ejercicio 4
p4 <- df %>% group_by(fumador) %>% summarize(media=mean(prima))
resul <- ((p4[2,2] / p4[1,2]) -1 )*100
resul[1,1]
## [1] 280.0001
sprintf("la diferencia porcentual es de %f ", resul[1,1])
## [1] "la diferencia porcentual es de 280.000146 "
resul <- as.character(round(resul[1,1],2))
paste(" la diferencia porcentual es de ",resul,"%")
## [1] " la diferencia porcentual es de 280 %"
##ejercicio 5
p5 <- ggplot(df) +
aes(x = sexo, fill=fumador) +
geom_bar(position = position_fill(reverse = TRUE), color = 'black' ) + labs(y="cantidad") +
theme(axis.text.y = element_blank(), axis.ticks = element_blank() )
ggplotly(p5)
# ejercicio 66
eje6 <- df %>% group_by(sexo) %>% filter(fumador=='si') %>% summarise(percel_25 = quantile(prima, probs=(0.25)),percel_50 = quantile(prima, probs=(0.50)),percel_75 = quantile(prima, probs=(0.75)),media = round(mean(prima),2))
eje6
## # A tibble: 2 × 5
## sexo percel_25 percel_50 percel_75 media
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 hombre 21242. 36085. 41798. 33042.
## 2 mujer 19696. 28950. 40918. 30679
fuma <- df %>% filter(fumador == "si")
ggplot(fuma) + aes(x=prima, color = sexo) +geom_density()
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.